# -*- coding: cp1252 -*-
# Philipp 3/31/2014

from __future__ import print_function
import csv # writing csv files
import glob # to find pathnames matching a specific pattern
import re # regular expressions
import sys # command line input
import os # ordner-struktur

######################################################################################################
# Analyses minutes in Guatemala for several years. This file must be in the directory structure      #
# above the sub directories containing the minutes. Only .txt files are analysed. And only if they   #
# they start with 'Gua'. No conversion from pdf to txt, i.e. the txt's must already be in the        #
# minutes folder. Output is spreadsheets located in the output subdirectory. For every year two      #
# spreadsheets are generated called 'representativesYear.csv' and 'votesYear.csv'                    # 
######################################################################################################

### geting parent directory and level 1 subdirectories
parent_dir = os.getcwd()
content_lvl1 = next(os.walk(parent_dir))
sub_dirs1 = content_lvl1[1]
output_dir = parent_dir+'\\output'
# check if output directory exists, create if it doesn't
try:
    os.chdir(output_dir)
except:
    os.mkdir(output_dir)
# relevant subdirs
for directory in sub_dirs1:
    if re.search('guatemala.*',directory):
        sub_dirs2 = re.findall('guatemala.*',directory)
        sub_dirs2 = sub_dirs2[0]
lvl2_dir = content_lvl1[0] + '\\'+sub_dirs2
# level 2 content
content_lvl2 = next(os.walk(lvl2_dir))
# folders to iterate through
folder_list = content_lvl2[1]

# iterate over the list of minutes folders
for folder in folder_list:
    current_path = lvl2_dir + '\\' + folder
    # current year
    current_year = re.findall('[2][0][0-1][0-9]',current_path)
    current_year = str(current_year[0])
    # change working directory
    os.chdir(current_path)
    file_list = glob.glob('Gua*.txt') # all txt files in current dir that start with Gua
    file_counter = -1 # to index the current file (zero means first file)
    rollcalls_per_year = 0 # counting the total number of roll calls in a given year
    representatives_sheet = [] # information needed for the representatives sheet
    representatives_sheet2 = []
    filenames_year = ['FileName'] # names of files with roll call
    datum_year = ['Date']
    session_type_year = ['SessionType'] # session types for each roll call
    session_number_year = ['SessionNumber'] # session number for each roll call
    yes_year = ['Yes']
    no_year = ['No']
    absent_year = ['Absent']
    topic_year = ['Subject']
    votes_year = ['Vote']

    # importing the names dictionary
    os.chdir(parent_dir)
    dict_raw = open("Dictguatemala.txt").read()
    dict_names = dict_raw.split('\n')
    os.chdir(current_path)
                                
    # 1) loop the following over all those files 
    for file_idx, filename in enumerate(file_list):
        
        file_counter = file_counter + 1 # for indexing current file
        # getting the entire textfile into a string object
        raw_text = open(filename).read()
        # converting some special characters
        raw_text = raw_text.replace('\xdc','U') # print[u,'\xdc'][0] prints the character
        raw_text = raw_text.replace('\xd1','N')
        raw_text = raw_text.replace('\xe9','e')
        raw_text = raw_text.replace('\xf3','o')
        raw_text = raw_text.replace('\xe1','a')
        raw_text = raw_text.replace('\xed','i')
        raw_text = raw_text.replace('\xfc','u')
        raw_text = raw_text.replace('\xfa','u')
        raw_text = raw_text.replace('\xf1','n')
        # splitting text line by line
        # (for this to work txt has to have been created as accessible txt and not as plain in Adobe Acrobat)
        workable_text = raw_text.split('\n')
        # working on which year
        current_year = re.findall('[0-9][0-9][0-9][0-9]',filename); current_year = current_year[0]

        # setting some counters and lists and vars
        start_lines = [] # list will be filled with line numbers where roll calls start
        end_lines = [] # list will be filled with line numbers where roll calls end
        line_number = -1 # start here cause list index is the comma seperating the elemnts and zero means before the first
        line_number_two = -1 
        roll_call_number = 0 # counts the number of roll calls per session
        roll_call_text = [''] # will be filled with the actual text of the roll call
        file_name = ['FileName'] # Name of the file where we get the vote
        file_date = ['Date'] # Date on the front page of the session
        session_type = [] # On front page. Options are: ORDINARIA, EXTRAORDINARIA, SOLEMNE
        page = 1 # start at title page
        start_page = [] # returns the page on which the roll call starts
        end_page = [] # returns the page on which the roll call ends
        topic_list = [] # will include temporary text from the table of contexts including the topics discussed
        topic_list_two = [] # again text for topics
        topics = [] # only actual topic text
        topic_corresponding = [] # the number indicates which topic index corresponds to the respective roll call index
        yes_votes = []
        no_votes = []
        abstentions = []
        total_votes = []
        check_vote_sum = []
        representative_counter = 0
        representative_list = []
        add_to_reps_later = []
        representative_list_clean = []
        representative_list_list = []
        yay_names = []
        nay_names = []
        yay_list = []
        nay_list = []
        z1 = 0
        a1 = 0
        representative_check_list = []
        tempo = []
        nombres_list = []
        ambiguous_name = [] # stores the list position of names that are ambiguous
        name_check_list = [] # stores the position of the corresponding name's position
        name_found_list = [] # indicates if the representative was found in session
        name_found_list_list = []
        yay_list_start_list = []
        yay_list_end_list = []
        nay_list_start_list = []
        nay_list_end_list = []
        date = 0

        # getting a list of the reprsentatives names
        for MPidx, MP in enumerate(dict_names):
            # names by certain year
            if re.search(str(current_year),MP):
                for MP2 in dict_names[MPidx+1:]:
                    #print MP2
                # stop condition at next empty line
                    if MP2 == '':
                        break
                    # stripping accidental whitespaces, fullstops and colons from the dict file
                    MP2 = MP2.strip(); MP2 = MP2.strip('.'); MP2 = MP2.strip(':')
                    nombres_list.append(MP2)
                    name_found_list.append(0)

        # chcking for ambiguous names
        for MPidx, MP in enumerate(nombres_list):
            for MPidx2, MP2 in enumerate(nombres_list):
                if MPidx != MPidx2:
                    # is the MP's name found in the name of another MP in the list
                    if re.search(MP,MP2):
                        # recoding the indices for this case
                        ambiguous_name.append(MPidx)
                        name_check_list.append(MPidx2)
                    
        # going line by line through entire workable text (the session document)
        for line in workable_text:
            line_number = line_number + 1
            rep_added = 0

            # counting pages
            if chr(12) in line:
                page = page + 1

            # looking for session type in the first 25 lines
            if line_number <= 24:
                if 'SESION' in line:
                    next_lines = str(workable_text[line_number]+workable_text[line_number+1])
                    if re.search('[O|S|E][R|O|X][D|L|T][I|E|R][N|M|A][A|N|O][R|E][^\s]*',next_lines):
                        x = re.findall('[O|S|E][R|O|X][D|L|T][I|E|R][N|M|A][A|N|O][R|E][^\s]*',next_lines)
                        session_type.extend(x)
                if 'NUMERO' in line:
                    next_lines = str(workable_text[line_number]+workable_text[line_number+1])
                    if re.search('\s+[0-9][0-9]?[0-9]?[\s*|$]',next_lines):
                        session_number = (re.findall('[0-9][0-9]?[0-9]?',next_lines))
                        session_number = map(int, session_number)

            # getting the date
            if line_number <= 100 and date == 0:
                mo = {1:'enero',2:'febrero',3:'marzo',4:'abril',5:'mayo',6:'junio',7:'julio',8:'agosto',9:'septiembre',10:'octubre',11:'noviembre',12:'diciembre'}
                if re.search('^Guatemala,.*(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre).*20[0|1][0-9]',line,re.IGNORECASE):
                    # converting month name into corresponding number
                    month = re.findall('(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)',line,re.IGNORECASE)
                    day = re.findall('[^0-9][0-9][0-9]?[^0-9]',line)
                    year = re.findall('20[0|1][0-9]',line)
                    if len(day) == 1:
                        # day
                        day = str(day[0])
                        day = day.strip(','); day = day.strip('.'); day = day.strip('-'); day = day.strip()
                        if not re.search('[0-9][0-9]',day): day = '0'+day
                        # month
                        if re.search(mo[1],month[0],re.IGNORECASE): month[0] = '1'
                        if re.search(mo[2],month[0],re.IGNORECASE): month[0] = '2'
                        if re.search(mo[3],month[0],re.IGNORECASE): month[0] = '3'
                        if re.search(mo[4],month[0],re.IGNORECASE): month[0] = '4'
                        if re.search(mo[5],month[0],re.IGNORECASE): month[0] = '5'
                        if re.search(mo[6],month[0],re.IGNORECASE): month[0] = '6'
                        if re.search(mo[7],month[0],re.IGNORECASE): month[0] = '7'
                        if re.search(mo[8],month[0],re.IGNORECASE): month[0] = '8'
                        if re.search(mo[9],month[0],re.IGNORECASE): month[0] = '9'
                        if re.search(mo[10],month[0],re.IGNORECASE): month[0] = '10'
                        if re.search(mo[11],month[0],re.IGNORECASE): month[0] = '11' 
                        if re.search(mo[12],month[0],re.IGNORECASE): month[0] = '12'
                        month = str(month[0])
                        if not re.search('[0-9][0-9]',month): month = '0'+month
                        # year
                        year = str(year[0])
                        # final date
                        date = month+'/'+day+'/'+year

                    if len(day) != 1 and date == 0:
                        for zeile in workable_text[line_number+1:]:
                            if re.search('(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre).*20[0|1][0-9]',zeile,re.IGNORECASE):
                                month = re.findall('(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)',zeile,re.IGNORECASE)
                                if len(month) == 1:
                                    if re.search(mo[1],month[0],re.IGNORECASE): month[0] = '1'
                                    if re.search(mo[2],month[0],re.IGNORECASE): month[0] = '2'
                                    if re.search(mo[3],month[0],re.IGNORECASE): month[0] = '3'
                                    if re.search(mo[4],month[0],re.IGNORECASE): month[0] = '4'
                                    if re.search(mo[5],month[0],re.IGNORECASE): month[0] = '5'
                                    if re.search(mo[6],month[0],re.IGNORECASE): month[0] = '6'
                                    if re.search(mo[7],month[0],re.IGNORECASE): month[0] = '7'
                                    if re.search(mo[8],month[0],re.IGNORECASE): month[0] = '8'
                                    if re.search(mo[9],month[0],re.IGNORECASE): month[0] = '9'
                                    if re.search(mo[10],month[0],re.IGNORECASE): month[0] = '10'
                                    if re.search(mo[11],month[0],re.IGNORECASE): month[0] = '11' 
                                    if re.search(mo[12],month[0],re.IGNORECASE): month[0] = '12'
                                    month = str(month[0])
                                    if not re.search('[0-9][0-9]',month): month = '0'+month
                                    # year
                                    year = re.findall('20[0|1][0-9]',zeile)
                                    year = str(year[0])
                                    # day
                                    day = re.findall('[^0-9][0-9][0-9]?[^0-9]',zeile)
                                    day = str(day[0])
                                    day = day.strip(','); day = day.strip('.'); day = day.strip()
                                    if not re.search('[0-9][0-9]',day): day = '0'+day
                                    #print (day)
                                    date = month+'/'+day+'/'+year
                                    break
                            if len(start_lines) > 0:
                                date = 'missing'
                                break
            
            # finding topics and creating a list of them
            if page < 7:
                # looking for a 1-2 digit number followed by a fullstop at the start of the line
                if re.search('^[0-9][0-9]?\.',line):
                    topic_lines = line
                    # iterating over the table of contents line by line
                    for follow_lines in workable_text[line_number + 1: len(workable_text)]:
                        # stop and append to topic list if a 1-2 digit number followed by a fullstop at begining of line is found or 'Pagina 2'
                        if re.search('^[0-9][0-9]?\.',follow_lines) or re.search('PAGINA\s+02',follow_lines):
                            topic_list.append(topic_lines)
                            break
                        # skip empty lines
                        if follow_lines.strip() == '':
                            continue
                        topic_lines = str(topic_lines + '\n' + follow_lines)

            # making the topic list more fine grained (looking for subtopics in topics)
            if line_number == len(workable_text)-1:
                # iterating over the list of topics
                for idx_element, list_element in enumerate(topic_list):
                    # splitting each topic line by line
                    t_txt = list_element.split('\n')
                    num_subtopic = 0
                    # going line by line through the topics
                    for idx_zeile, zeile in enumerate(t_txt):
                        # when in the first line of a new topic make that line part of string and go to next loop iteration
                        if idx_zeile == 0:
                            new_t_txt = zeile
                            continue
                        # more fine grained topic list
                        if re.search('^[0-9][0-9]?\.',zeile) or re.search('^[a-z][a-z]?\)',zeile):
                            if re.search('\(Pags\.',new_t_txt) or re.search('\(\s?Pag\.',new_t_txt):
                                num_subtopic = num_subtopic + 1
                                topic_list_two.append(new_t_txt)
                                new_t_txt = zeile
                            else:
                                new_t_txt = new_t_txt + '\n' + zeile
                        # not the end of the topic
                        else:
                            new_t_txt = new_t_txt + '\n' + zeile
                            # the last element in the list is always added (may be the only one if there was no subtopic)
                            if idx_zeile + 1  == len(t_txt):
                                topic_list_two.append(new_t_txt)
                    
                # getting page information form the topic list (i.e. the page where the topic ends)
                topic_pages_tmp = []
                topic_end_tmp = []
                topic_end = []
                # iterating over the fine grained topic list
                for list_element in topic_list_two:
                    t_txt = list_element.split('\n')
                    # iterating over the topic text line by line
                    for idx_zeile, zeile in enumerate(t_txt):
                        # looking for the page number 
                        if re.search('\(Pags\.',zeile):
                            # sometimes the page info stretches over 2 lines
                            if len(t_txt) > idx_zeile +1:
                                txt = t_txt[idx_zeile] + t_txt[idx_zeile+1]
                            else:
                                txt = t_txt[idx_zeile]
                            x = re.findall('\(Pags\..*\)',txt)
                            x = str(x[0])
                            # hyphenated range
                            if re.search('-',x): 
                                x = re.findall('-\n?[0-9][0-9]?[0-9]?',x)
                                x = str(x[0])
                                topic_end_tmp.append(x)
                                break
                            # no hyphen
                            if not re.search('-',x):
                                x = re.findall('[0-9][0-9]?[0-9]?',x)
                                x = str(x[0])
                                topic_end_tmp.append(x)
                                break
                        # alternative page numbering    
                        if re.search('\(\s?Pag\.',zeile):
                            # sometimes the page info stretches over 2 lines
                            if len(t_txt) > idx_zeile +1:
                                txt = t_txt[idx_zeile] + t_txt[idx_zeile+1]
                            else:
                                txt = t_txt[idx_zeile]
                            x = re.findall('\(\s?Pag\..*\)',txt)
                            x = str(x[0])
                            # hyphenated range
                            if re.search('-',x):
                                x = re.findall('-\n?[0-9][0-9]?[0-9]?',x)
                                x = str(x[0])
                                topic_end_tmp.append(x)
                                break
                            # no hyphen
                            if not re.search('-',x):
                                x = re.findall('[0-9][0-9]?[0-9]?',x)
                                x = str(x[0])
                                topic_end_tmp.append(x)
                                break
                        # when no page information found
                        if idx_zeile +1 == len(t_txt) and not re.search('\(\s?Pag\.',zeile) and not re.search('\(Pags\.',zeile):
                            topic_end_tmp.append('0')
                # end page
                for element in topic_end_tmp:
                    if not re.search('[0-9][0-9][0-9][0-9]',element):
                        x = re.findall('[0-9][0-9]?[0-9]?',element)
                        topic_end.extend(x)
                        topic_end = map(int, topic_end)     
                
            # phrase that indicates start of roll call
            if "Se les solicita a los honorables senores representantes poner debida atencion a su nombre" in line or \
               "Se les solicita a los honorables senores representantes poner debida atencion a sus nombres" in line or \
               "Se les solicita a los honorables senores representantes poner atencion a su nombre" in line or \
               "Se les solicita a los honorables representantes poner debida atencion a su nombre" in line:
                if (len(start_page) > 0 and not start_page[-1] == page) or (len(start_page) == 0):
                    # check further down the text if people actually start voting
                    for txt in workable_text[line_number+1:line_number+51]:
                        if re.search('a\s+favor',txt,re.IGNORECASE) or re.search('en\s+contra',txt,re.IGNORECASE):
                            start_lines.append(line_number)
                            roll_call_number = roll_call_number + 1
                            file_name.append(filename)
                            rollcalls_per_year = rollcalls_per_year + 1
                            start_page.append(page)
                            break
                
            # sometimes the trigger phrase stretches over multiple lines
            if not "Se les solicita a los honorables senores representantes poner debida atencion a su nombre" in line: # entire phrase not in 1 line
                if line_number + 2 < len(workable_text): # only do this if not in the last line of the workable text
                    if "Se" in line:
                        next_lines = str(workable_text[line_number]+workable_text[line_number+1])
                        if re.search('Se\s*les\s*solicita\s*a\s*los\s*honorables\s*senores\s*representantes\s*poner\s*debida\s*atencion\s*a\s*su\s*nombre',next_lines) or \
                           re.search('Se\s+les\s+solicita\s+a\s+los\s+honorables\s+senores\s+representantes\s+poner\s+debida\s+atencion\s+a\s+sus\s+nombres',next_lines) or \
                           re.search('Se\s+les\s+solicita\s+a\s+los\s+honorables\s+senores\s+representantes\s+poner\s+atencion\s+a\s+su\s+nombre',next_lines) or \
                           re.search('Se\s+les\s+solicita\s+a\s+los\s+honorables\s+representantes\s+poner\s+debida\s+atencion\s+a\s+su\s+nombre',next_lines):
                            if (len(start_page) > 0 and not start_page[-1] == page) or (len(start_page) == 0):
                                # check further down the text if people actually start voting
                                for txt in workable_text[line_number+1:line_number+51]:
                                    if re.search('a\s+favor',txt,re.IGNORECASE) or re.search('en\s+contra',txt,re.IGNORECASE):
                                        start_lines.append(line_number)
                                        roll_call_number = roll_call_number + 1
                                        file_name.append(filename)
                                        rollcalls_per_year = rollcalls_per_year + 1
                                        start_page.append(page)
                                        break

            # checking if it is really a new roll call or if there has been some interruption
            if len(start_page) > 1:
                #print(start_page)
                # iterating over the roll_calls
                for rollcall_idx, element in enumerate(start_page):
                    page_one = start_page[rollcall_idx]
                    if len(start_page)>= rollcall_idx+2:
                        page_two = start_page[rollcall_idx+1]
                        # the second roll call needs to start at least 5 pages after the first started
                        if not page_two >= page_one + 5:
                            # deleting the roll call that started first
                            del(start_page[rollcall_idx])
                            del(start_lines[rollcall_idx])
                            roll_call_number = roll_call_number - 1
                            representative_list = []; yay_votes =[]; nay_votes = []
                            
            # checking for total number of yes votes, no votes and abstentions
            if len(start_lines) > len(end_lines):
                # total yes votes
                if re.search('A\s+FAVOR:\s+[0-9][0-9]?[0-9]?',line,re.IGNORECASE):
                    x = re.findall('[0-9][0-9]?[0-9]?',line)
                    yes_votes.extend(x)
                    yes_votes = map(int, yes_votes)
                    # total no votes
                if re.search('EN\s+CONTRA:\s+[0-9][0-9]?[0-9]?',line,re.IGNORECASE):
                    x = re.findall('[0-9][0-9]?[0-9]?',line)
                    no_votes.extend(x)
                    no_votes = map(int, no_votes)
                    # total abstentions
                if re.search('(AUSENTES:\s+[0-9][0-9]?[0-9]?)|(DIPUTADOS\s+AUSENTES:?\s+[0-9][0-9]?[0-9]?)',line,re.IGNORECASE):
                    x = re.findall('[0-9][0-9]?[0-9]?',line)
                    abstentions.extend(x)
                    abstentions = map(int, abstentions)

                # 2nd phrases for yes, no, and abstain
                if re.search('(voatron)|(diputados)',line):
                    text = str(workable_text[line_number]+workable_text[line_number+1]+workable_text[line_number+2]+workable_text[line_number+3]\
                               +workable_text[line_number+4]+workable_text[line_number+5])
                    # yes votes
                    if len(abstentions) < len(start_lines):
                        if re.search('[0-9][0-9]?[0-9]?\s+diputados\s+votaron\s+a\s+favor',text):
                            x = re.findall('[0-9][0-9]?[0-9]?\s+diputados\s+votaron\s+a\s+favor',text)
                            x = str(x[0])
                            yes = re.findall('[0-9][0-9]?[0-9]?',x)
                            yes_votes.extend(yes)
                            yes_votes = map(int, yes_votes)
                        # no votes
                        if re.search('[0-9][0-9]?[0-9]?\s+diputados\s+votaron\s+en\s+contra',text):
                            x = re.findall('[0-9][0-9]?[0-9]?\s+diputados\s+votaron\s+en\s+contra',text)
                            x = str(x[0])
                            no = re.findall('[0-9][0-9]?[0-9]?',x)
                            no_votes.extend(no)
                            no_votes = map(int, no_votes)
                        # abstentions
                        if re.search('[0-9]?[0-9]?[0-9]?\s+diputados\s+ausentes',text):
                            x = re.findall('[0-9]?[0-9]?[0-9]?\s+diputados\s+ausentes',text)
                            x = str(x[0])
                            abstain = re.findall('[0-9][0-9]?[0-9]?',x)
                            abstentions.extend(abstain)
                            abstentions = map(int, abstentions)
                
                # total votes
                if len(total_votes) < len(start_lines):
                    # 1. phrase
                    if re.search('TOTAL:\s+[0-9][0-9][0-9]?\s*[^0-9]',line,re.IGNORECASE):
                        #print (line)
                        x = re.findall('[0-9][0-9]?[0-9]?',line)
                        total_votes.extend(x)
                        total_votes = map(int, total_votes)
                        #print (total_votes)
                    # 2. phrase
                    if re.search('TOTAL\s+DE\s+[0-9][0-9][0-9]?\s*[^0-9]',line,re.IGNORECASE):
                        #print (line)
                        x = re.findall('TOTAL\s+DE\s+[0-9][0-9][0-9]?\s*[^0-9]',line,re.IGNORECASE)
                        x = str(x[0])
                        total = re.findall('[0-9][0-9]?[0-9]?',x)
                        total_votes.extend(total)
                        total_votes = map(int, total_votes)
                        #print (total_votes)

            # when no total votes have been printed
            if len(start_lines) > 0 and start_lines[-1] == line_number: # if in line where roll call starts
                if not len(start_lines) - 1 == len(total_votes): # if startline vecotr is 2 greater than total votes vecotr
                    total_votes.append(999)
            
            # getting a list of representatives and of their votes
            if len(start_lines) > len(end_lines):
                # phrases that should (not) be in the line
                if re.search('EN\s+CONTRA',line,re.IGNORECASE) or \
                       re.search('A\s+FAVOR',line,re.IGNORECASE) or \
                       re.search('AUSENTE',line,re.IGNORECASE):
                        if not re.search('SECRETARI[O|A]',line) and not re.search('presidente',line):
                            if not re.search('VOTO\s+\ES',line,re.IGNORECASE) and not re.search('gracias',line):
                                # iterating through the representative names list
                                for idx, MP in enumerate(nombres_list):
                                        # checking if names appears in line
                                        if re.search(MP,line,re.IGNORECASE):
                                            # exception may occur for ambiguous names
                                            try:
                                                # checking if the name is an ambiguous name
                                                for pos, element in enumerate(ambiguous_name):
                                                    if int(element) == idx:
                                                        # checking that it's not the check name in the line
                                                        check_name = nombres_list[name_check_list[pos]]
                                                        check_name = check_name.split(' ')
                                                        for check_idx, check_element in enumerate(check_name):
                                                            check_name[check_idx] = check_element.strip(',')    
                                                        ambiguous = 0
                                                        for check_element in check_name:
                                                            if re.search(check_element,line,re.IGNORECASE):
                                                                ambiguous = ambiguous + 1
                                                            if len(check_name) == ambiguous:
                                                                raise NameError
                                            except NameError:
                                                continue
                                            # the correct name appears in the list 
                                            else:
                                                if re.search('EN\s+CONTRA',line,re.IGNORECASE) and not re.search('A\s+FAVOR',line,re.IGNORECASE):
                                                    x = MP+'; En contra; '+ str(line_number)
                                                    representative_list.append(x)
                                                    name_found_list[idx] = 1
                                                if re.search('A\s+FAVOR',line,re.IGNORECASE) and not re.search('EN\s+CONTRA',line,re.IGNORECASE):
                                                    x = MP+'; A favor; '+ str(line_number)
                                                    representative_list.append(x)
                                                    name_found_list[idx] = 1
                                                # when both a favor and en contra are in line, taking the one that comes later
                                                if re.search('EN\s+CONTRA',line,re.IGNORECASE) and re.search('A\s+FAVOR',line,re.IGNORECASE):
                                                    xy = re.findall('[A|E][\s|N][F|\s][A|C][V|O][O|N][R|T]R?A?',line,re.IGNORECASE)
                                                    x = xy[-1]
                                                    if re.search('A\s+FAVOR',x,re.IGNORECASE):                                                    
                                                        x = MP+'; A favor; '+ str(line_number)
                                                    if re.search('EN\s+CONTRA',x,re.IGNORECASE):
                                                        x = MP+'; En contra; '+ str(line_number)
                                                    representative_list.append(x)
                                                    name_found_list[idx] = 1
                                                if re.search('AUSENTE',line,re.IGNORECASE):
                                                    # at least in case the representative is first recorded as absent and then actually says something
                                                    # taking care of this here but could also be done in the cleaning list later
                                                    if not re.search('A\s+FAVOR',line,re.IGNORECASE) and not re.search('EN\s+CONTRA',line,re.IGNORECASE):
                                                        x = MP+'; Ausente; '+ str(line_number)
                                                        representative_list.append(x)
                                                        name_found_list[idx] = 1
                                                #print x

            # finding those cases where the secretary repeats the vote
            # this can later be used easily to extract the justification of the representative
            if len(start_lines) > len(end_lines):
                # stopping only at those lines where the secretary speaks
                if re.search('SECRETARI[O|A]',line):
                    if re.search('A\s+FAVOR',line,re.IGNORECASE) or re.search('EN\s+CONTRA',line,re.IGNORECASE):
                        # going up in the text to see whose vote the secretary is repeating
                        x = start_lines[len(start_lines)-1]
                        #print line, line_number
                        for zeile in reversed(workable_text[x:line_number]):
                            found = 0
                            # iterating through the representative names list and checking if he is in line
                            for idx, MP in enumerate(nombres_list):
                                # checking if names appears in line (this should usually be when he justifies his vote)
                                # however once, GuaUni2004n15, the MP's name is misspelled and he finds the president instead
                                # I'm therefore skipping the line if the word PRESIDENTE in capitals appears because above this
                                # the name of the representative appears again
                                if re.search(MP,zeile) and not re.search('PRESIDENTE',zeile) and not re.search('SECRETARI[O|A]',zeile):
                                    # print MP,zeile
                                    # exception may occur for ambiguous names
                                    try:
                                        # checking if the name is an amiguous name
                                        for pos, element in enumerate(ambiguous_name):
                                            if int(element) == idx:
                                                # checking that it's not the check name in the line
                                                check_name = nombres_list[name_check_list[pos]]
                                                check_name = check_name.split(' ')
                                                for check_idx, check_element in enumerate(check_name):
                                                    check_name[check_idx] = check_element.strip(',')    
                                                    ambiguous = 0
                                                for check_element in check_name:
                                                    if re.search(check_element,zeile,re.IGNORECASE):
                                                        ambiguous = ambiguous + 1
                                                    if len(check_name) == ambiguous:
                                                        raise NameError
                                    except NameError:
                                        continue
                                    # the correct name appeas in the list 
                                    else:
                                        if re.search('EN\s+CONTRA',line,re.IGNORECASE):
                                            x = MP+'; En contra; '+ str(line_number)
                                        if re.search('A\s+FAVOR',line,re.IGNORECASE):
                                            x = MP+'; A favor; '+ str(line_number)
                                        representative_list.append(x)
                                        name_found_list[idx] = 1
                                        #print x
                                        found = 1
                                        break
                            if found == 1:
                                break

            # the secretary is in line but he repeats nothing (because what he repeats is not on the same line)
            if len(start_lines) > len(end_lines):
                if re.search('SECRETARI[O|A]',line):
                    if not re.search('A\s+FAVOR',line,re.IGNORECASE) and not re.search('EN\s+CONTRA',line,re.IGNORECASE): 
                        if re.search(':(?!.*[A-z])',line): # colon or whitespace not followed by a letter
                            stop = 0
                            # going down in the text to look for something the secretary said
                            for zeile_idx, zeile_txt in enumerate(workable_text[line_number+1:]):
                                # stop condition: finds a name before a vote 
                                for nombre_idx, MP in enumerate(nombres_list):
                                    if re.search(MP,zeile_txt):
                                        stop = 1
                                        break       
                                if stop == 1: break
                                # looking for in favor or against phrase
                                if re.search('A\s+FAVOR',zeile_txt,re.IGNORECASE) or re.search('EN\s+CONTRA',zeile_txt,re.IGNORECASE):
                                    # now going back up the text from the secretary line to see whose vote has been repeated
                                    x = start_lines[len(start_lines)-1]
                                    #print line, line_number
                                    for zeile in reversed(workable_text[x:line_number]):
                                        found = 0
                                        # iterating through the representative names list and checking if he is in line
                                        for idx, MP in enumerate(nombres_list):
                                            # checking if names appears in line (this should usually be when he justifies his vote)
                                            # however once, GuaUni2004n15, the MP's name is misspelled and he finds the president instead
                                            # I'm therefore skipping the line if the word PRESIDENTE in capitals appears because above this
                                            # the name of the representative appears again
                                            if re.search(MP,zeile) and not re.search('PRESIDENTE',zeile) and not re.search('SECRETARI[O|A]',zeile):
                                                #print (MP,zeile)
                                                # exception may occur for ambiguous names
                                                try:
                                                    # checking if the name is an amiguous name
                                                    for pos, element in enumerate(ambiguous_name):
                                                        if int(element) == idx:
                                                            # checking that it's not the check name in the line
                                                            check_name = nombres_list[name_check_list[pos]]
                                                            check_name = check_name.split(' ')
                                                            for check_idx, check_element in enumerate(check_name):
                                                                check_name[check_idx] = check_element.strip(',')    
                                                            ambiguous = 0
                                                            for check_element in check_name:
                                                                if re.search(check_element,zeile,re.IGNORECASE):
                                                                    ambiguous = ambiguous + 1
                                                                    if len(check_name) == ambiguous:
                                                                        raise NameError
                                                except NameError:
                                                    continue
                                                # the correct name appeas in the list 
                                                else:
                                                    if re.search('EN\s+CONTRA',zeile_txt,re.IGNORECASE):
                                                        x = MP+'; En contra; '+ str(line_number+zeile_idx)
                                                    if re.search('A\s+FAVOR',zeile_txt,re.IGNORECASE):
                                                        x = MP+'; A favor; '+ str(line_number+zeile_idx)
                                                    representative_list.append(x)
                                                    name_found_list[idx] = 1
                                                    #print x
                                                    found = 1
                                                    break
                                        if found == 1:
                                            break                

            # somebody has indicated his vote to someone
            if len(start_lines) > len(end_lines):
                # stopping only at those lines where the secretary speaks
                if re.search('INDICA',line):
                    qqtext = workable_text[line_number] + workable_text[line_number+1]
                    qqtext = re.findall('INDICA.*',qqtext)
                    qqtext = qqtext[0]
                    # iterating through the representative names list and checking if he is in line
                    found = 0
                    for idx, MP in enumerate(nombres_list):
                        if re.search(MP,qqtext):
                            # print MP,qqtext
                            # exception may occur for ambiguous names
                            try:
                                # checking if the name is an amiguous name
                                for pos, element in enumerate(ambiguous_name):
                                    if int(element) == idx:
                                        # checking that it's not the check name in the line
                                        check_name = nombres_list[name_check_list[pos]]
                                        check_name = check_name.split(' ')
                                        for check_idx, check_element in enumerate(check_name):
                                            check_name[check_idx] = check_element.strip(',')    
                                        ambiguous = 0
                                        for check_element in check_name:
                                            if re.search(check_element,qqtext,re.IGNORECASE):
                                                ambiguous = ambiguous + 1
                                                if len(check_name) == ambiguous:
                                                    raise NameError
                            except NameError:
                                continue
                            # the correct name appears in the list 
                            else:
                                if re.search('EN\s+CONTRA',qqtext,re.IGNORECASE):
                                    x = MP+'; En contra; '+ str(line_number)
                                    representative_list.append(x)
                                    name_found_list[idx] = 1
                                    found = 1
                                if re.search('A\s+FAVOR',qqtext,re.IGNORECASE):
                                    x = MP+'; A favor; '+ str(line_number)
                                    representative_list.append(x)
                                    name_found_list[idx] = 1
                                    found = 1
                                if found == 1: break
                            
            # looking for 'se rectifica' cases
            if len(start_lines) > len(end_lines):
                if re.search('rectifica',line,re.IGNORECASE):
                    #print line
                    # going up in the text to see whose vote is being corrected
                    x = start_lines[len(start_lines)-1]
                    #print line, line_number
                    for zeile in reversed(workable_text[x:line_number]):
                        found = 0
                        # iterating through the representative names list and checking if he is in line
                        for idx, MP in enumerate(nombres_list):
                            # checking if names appears in line (this should usually be when he justifies his vote)
                            # however once, GuaUni2004n15, the MP's name is misspelled and he finds the president instead
                            # I'm therefore skipping the line if the word PRESIDENTE in capitals appears because above this
                            # the name of the representative appears again
                            if re.search(MP,zeile) and not re.search('PRESIDENTE',zeile):
                                #print MP,zeile
                                # exception may occur for ambiguous names
                                try:
                                    # checking if the name is an amiguous name
                                    for pos, element in enumerate(ambiguous_name):
                                        if int(element) == idx:
                                            # checking that it's not the check name in the line
                                            check_name = nombres_list[name_check_list[pos]]
                                            check_name = check_name.split(' ')
                                            for check_idx, check_element in enumerate(check_name):
                                                check_name[check_idx] = check_element.strip(',')    
                                            ambiguous = 0
                                            for check_element in check_name:
                                                if re.search(check_element,zeile,re.IGNORECASE):
                                                    ambiguous = ambiguous + 1
                                                    if len(check_name) == ambiguous:
                                                        raise NameError
                                except NameError:
                                    continue
                                # the correct name appeas in the list 
                                else:
                                    checktext = line + workable_text[line_number+1]
                                    if re.search('EN\s+CONTRA',checktext,re.IGNORECASE):
                                        x = MP+'; En contra; '+ str(line_number)
                                        found = 1
                                    if re.search('A\s+FAVOR',checktext,re.IGNORECASE):
                                        x = MP+'; A favor; '+ str(line_number)
                                        found = 1
                                    if found == 1:
                                        representative_list.append(x)
                                        name_found_list[idx] = 1
                                        #print x
                                    #break
                        if found == 1:
                            break       

            # the legislator speaks and declares his vote
            if len(start_lines) > len(end_lines):
                # iterating through the representative names list and checking if he is in line
                for idx, MP in enumerate(nombres_list):
                    # checking if names appears in line (this should usually be when he justifies his vote)
                    # however once, GuaUni2004n15, the MP's name is misspelled and he finds the president instead
                    # I'm therefore skipping the line if the word PRESIDENTE in capitals appears because above this
                    # the name of the representative appears again
                    if not re.search('EN\s+CONTRA',line,re.IGNORECASE,) and not re.search('A\s+FAVOR',line,re.IGNORECASE) and not re.search('AUSENTE',line,re.IGNORECASE):
                        if re.search(MP,line) and not re.search('PRESIDENTE',line) and not re.search('SECRETARI[O|A]',line,re.IGNORECASE):
                            #print MP,line
                            # exception may occur for ambiguous names
                            try:
                                # checking if the name is an amiguous name
                                for pos, element in enumerate(ambiguous_name):
                                    if int(element) == idx:
                                        # checking that it's not the check name in the line
                                        check_name = nombres_list[name_check_list[pos]]
                                        check_name = check_name.split(' ')
                                        for check_idx, check_element in enumerate(check_name):
                                            check_name[check_idx] = check_element.strip(',')    
                                        ambiguous = 0
                                        for check_element in check_name:
                                            if re.search(check_element,line,re.IGNORECASE):
                                                ambiguous = ambiguous + 1
                                                if len(check_name) == ambiguous:
                                                    raise NameError
                            except NameError:
                                continue
                            # the correct name appeas in the list 
                            else:
                                found = 0
                                # going down in the text and looking if the representative declares his vote
                                for i, newline in enumerate(workable_text[line_number+1:]):
                                    if i == 0: qtext = newline
                                    if i > 0: qtext = qtext + newline 
                                    # stop condition is: the secretary speaks again
                                    if re.search('SECRETARI[O|A]',qtext,re.IGNORECASE):
                                        break
                                    if re.search('voto\s+a\s+favor',qtext):
                                        xx = MP+'; A favor; '+ str(line_number)
                                        found = 1
                                    if re.search('voto\s+en\s+contra',qtext):
                                        xx = MP+'; En contra; '+ str(line_number)
                                        found = 1
                                    if found == 1:
                                        representative_list.append(xx)
                                        name_found_list[idx] = 1
                                        #print xx                    

            # getting the list of representatives who voted yes - usually given right after the voting procedure
            if len(start_lines) > len(end_lines):
                if re.search('diputados',line) or re.search('representantes',line):
                    if re.search('que\s+votaron\s+a\s+favor',line):
                         zeilenzahl = line_number 
                         stext = line
                    else:           
                        zeilenzahl = line_number + 1
                        stext = line + ' ' + workable_text[zeilenzahl]
                    if re.search('que\s+votaron\s+a\s+favor',stext):
                        for index, zeile in enumerate(workable_text[zeilenzahl+1:]):
                            # false positive condition
                            get_out = 0 
                            if re.search('LA PALABRA POR EL ORDEN',zeile):
                                get_out = 1
                                break
                            # stop conditions
                            if re.search('[T|t][O|o][T|t][A|a][L|l]:\s+[0-9][0-9][0-9]?\s*[^0-9]',zeile): break
                            if re.search('TOTAL\s+DE\s+[0-9][0-9][0-9]?\s*[^0-9]',zeile,re.IGNORECASE):
                                if not re.search('Que\s+hacen\s+un\s+total\s+de',zeile): break
                            if re.search('[0-9]',zeile): break
                            if re.search('nombre',zeile): break
                            if re.search('contra\.*',zeile): break
                            if not re.search('[\(|\)]',zeile) and re.search('[\w������]+',zeile):
                                if not re.search('tr�mite',zeile):
                                    yay_names.append(zeile)
                        if get_out == 0:
                            yay_list_start_list.append(zeilenzahl+1)
                            yay_list_end_list.append(zeilenzahl+index)

            # getting the list of representatives who voted no
            if len(start_lines) > len(end_lines):
                if re.search('contra',line):
                    if re.search('que\s+votaron\s+en\s+contra',line):
                         zeilenzahl = line_number
                         stext = line
                    else:           
                        zeilenzahl = line_number + 1
                        stext = line + ' ' + workable_text[zeilenzahl]
                    if re.search('que\s+votaron\s+en\s+contra',stext):
                        for index, zeile in enumerate(workable_text[zeilenzahl+1:]):
                            # stop conditions
                            if re.search('[T|t][O|o][T|t][A|a][L|l]:\s+[0-9][0-9][0-9]?\s*[^0-9]',zeile): break
                            if re.search('TOTAL\s+DE\s+[0-9][0-9][0-9]?\s*[^0-9]',zeile,re.IGNORECASE):
                                if not re.search('Que\s+hacen\s+un\s+total\s+de',zeile): break
                            if re.search('[0-9]',zeile): break
                            if re.search('votos',zeile,re.IGNORECASE): break
                            if re.search('resultado',zeile,re.IGNORECASE): break
                            if re.search('nombres',zeile) or re.search('favor',zeile): break
                            if not re.search('[\(|\)]',zeile) and re.search('[\w������]+',zeile):
                                if not re.search('tr�mite',zeile):
                                    nay_names.append(zeile)
                        nay_list_start_list.append(zeilenzahl)
                        nay_list_end_list.append(zeilenzahl+index)
                                
            # look for endline if you found a start line                
            if len(start_lines) > len(end_lines): 
                # phrase that indicates end of roll call
                if re.search('[T|t][O|o][T|t][A|a][L|l]:\s+[0-9][0-9][0-9]?\s*[^0-9]',line):
                    end_lines.append(line_number)
                    end_page.append(page)
                    # storing the representatives list name found list in other list in case there's another session
                    representative_list_list.append(representative_list)
                    yay_list.append(yay_names); nay_list.append(nay_names)
                    yay_names = []; nay_names = []
                    representative_list = []
                    name_found_list2 = list(name_found_list)
                    name_found_list_list.append(name_found_list2)
                    for i, e in enumerate(name_found_list):
                        name_found_list[i] = 0

                # there may be an alternative phrase which will be captured below
                if len(start_lines) > len(end_lines): # found no end_line with the first variante            
                    if re.search('TOTAL\s+DE\s+[0-9][0-9][0-9]?\s*[^0-9]',line,re.IGNORECASE):
                        if not re.search('Que\s+hacen\s+un\s+total\s+de',line) and not re.search('total\s+de\s+[0-9][0-9]?\s+centros',line):
                            end_lines.append(line_number)
                            end_page.append(page)
                            # storing the representatives list name found list in other list in case there's another session
                            representative_list_list.append(representative_list)
                            yay_list.append(yay_names); nay_list.append(nay_names)
                            yay_names = []; nay_names = []
                            representative_list = []
                            name_found_list2 = list(name_found_list)
                            name_found_list_list.append(name_found_list2)
                            for i, e in enumerate(name_found_list):
                                name_found_list[i] = 0

                # 3rd alternative, already found another starting point
                if len(start_lines)-1 > len(end_lines):
                        end_lines.append(start_lines[-1]-1)
                        end_page.append(page)
                        # storing the representatives list name found list in other list in case there's another session
                        representative_list_list.append(representative_list)
                        yay_list.append(yay_names); nay_list.append(nay_names)
                        yay_names = []; nay_names = []
                        representative_list = []
                        name_found_list2 = list(name_found_list)
                        name_found_list_list.append(name_found_list2)
                        for i, e in enumerate(name_found_list):
                            name_found_list[i] = 0
                            
    ##################################################################################################################
    ## this part runs after the loop through the entire workable text is done but before moving on to the next file ##
    ##################################################################################################################
        
        #sys.exit('0')
        # at leas n one case the necessary quorum for a roll call was not reached and the roll call aborted
        # in this case, a roll call will have been registered but no end to the roll call
        # checking if this is the case and cleaning these cases (example p. 99 GuaUni2005n69)
        if len(start_page) > len(end_page):
            start_page = start_page[:-1]
            roll_call_number = roll_call_number - 1
            rollcalls_per_year = rollcalls_per_year - 1
            start_lines = start_lines[:-1]

        # checking which topic corresponds to roll call
        if roll_call_number > 0:
            # iterating over roll call 
            for idx_roll_call, rollcall in enumerate(start_page):
                match = 0
                # iterating over topic
                for idx_topic, topic in enumerate(topic_end):                    
                    if end_page[idx_roll_call] <= topic_end[idx_topic]: 
                        topic_corresponding.append(idx_topic)
                        topic_year.append(topic_list_two[topic_corresponding[idx_roll_call]])
                        match = 1
                        break
                # when roll call cant be matched to topic
                if (idx_topic+1) == len(topic_end) and match == 0:
                    topic_year.append('no matching topic found')
                    topic_corresponding.append(99) # just to keep the spacing

        # the cleaning process has to run 3 times to filter out everything
        for eee in range(0,3):
            # cleaning the representative lists
            if roll_call_number > 0:
                for i, item in enumerate(representative_list_list):
                    representative_list_clean = item
                    # removing duplicates
                    representative_list_clean = set(representative_list_clean)
                    representative_list_clean = list(representative_list_clean)
                    # generating names list
                    names_list = []
                    for e in representative_list_clean:
                        inhalt = e.split(';')
                        name = inhalt[0]; name = name.strip()
                        names_list.append(name)
                    # removing dublicates from the names list
                    names_list = set(names_list)
                    names_list = list(names_list)
                    # actual cleaning
                    representative_list_clean_two = list(representative_list_clean) # this mew list must not be manipulated when the other list changes!
                    # iterating through names list
                    for nombre in names_list:
                        # iterating through outer representative list
                        for element in representative_list_clean_two:
                            inhalt = element.split(';')
                            name = inhalt[0]; name = name.strip()
                            position = inhalt[2]; position = int(position.strip())
                            # if the name from the names list is in the outer rep list
                            if name == nombre:
                                # go through the inner representative list
                                for idx2, element2 in enumerate(representative_list_clean):
                                    inhalt2 = element2.split(';')
                                    name2 = inhalt2[0]; name2 = name2.strip()
                                    position2 = inhalt2[2]; position2 = int(position2.strip())
                                    # if the name from the outer loop and the names list appears again in the inner loop
                                    if name == name2:
                                        if position2 < position:
                                            del representative_list_clean[idx2]
                    # handing back the representative list
                    representative_list_list[i] = representative_list_clean

        # checking the list one more time for spelling misstakes
        if roll_call_number > 0:
            for i, item in enumerate(representative_list_list):
                representative_list_clean = item
                # iterating through the representatives list
                for ii, MP in enumerate(representative_list_clean):
                    # spliting constitutent information into name vote, choice and, source line
                    inhalt = MP.split(';')
                    name = inhalt[0]; name = name.strip()
                    vote = inhalt[1]; vote = vote.strip()
                    source = inhalt[2]; source = source.strip()
                    # checking for name spelling misstakes
                    if re.search('MENDEZ\s+HEBRUGER',name):
                        name = 'MENDEZ HERBRUGER'
                    if re.search('MARROQUIN\s+DE\s+PALOMO',name):
                        name = 'MARROQUIN GODOY DE PALOMO'
                    if re.search('BARILLAS\s+\DE\s+DUARTE',name):
                        name = 'BARILLAS CARIAS DE DUARTE'
                    if re.search('BARILLAS\s+\DE\s+DUARTE',name):
                        name = 'BARILLAS CARIAS DE DUARTE'
                    if re.search('MONTENEGRO\s+COTTON',name):
                        name = 'MONTENEGRO COTTOM'    
                    # putting constituent parts back together
                    xxx = name+';'+vote+';'+source
                    representative_list_clean[ii] = xxx
            # handing back the representative list
            representative_list_list[i] = representative_list_clean

    ##    # trying to find a voting choice for those legislators who are still not accounted for
    ##    # in the yes and no numbers list after the voting session
    ##    if roll_call_number > 0:
    ##        for i, item in enumerate(start_lines):
    ##            if len(representative_list_list[i]) != 158:
    ##                # getting the names found list & representatives lists
    ##                name_found_list = name_found_list_list[i]
    ##                representative_list_clean = representative_list_list[i]
    ##                # for each guy who is in the names found 
    ##                for ii, MP in enumerate(name_found_list):
    ##                    if MP == 0:
    ##                        # getting the name of the missing person
    ##                        missing_MP = nombres_list[ii]
    ##                        # getting the boundaries of the yes list
    ##                        start_yes = yay_list_start_list[i]
    ##                        end_yes = yay_list_end_list[i]                        
    ##                        # going through the part of the current roll call where the yes votes are declared
    ##                        for IDX_zeile, zeile in enumerate(workable_text[start_yes:end_yes]):
    ##                            if re.search(missing_MP,zeile,re.IGNORECASE):
    ##                                # ambiguous name check
    ##                                try:
    ##                                    # checking if the name is an amiguous name
    ##                                    for pos, element in enumerate(ambiguous_name):
    ##                                        if int(element) == ii:
    ##                                            # checking that it's not the check name in the line
    ##                                            check_name = nombres_list[name_check_list[pos]]
    ##                                            
    ##                                            # it's usually the full name now so, switching around the name order
    ##                                            check_name = check_name.split(',')
    ##                                            if len(check_name) == 2:
    ##                                                first_name = check_name[1]; first_name = first_name.strip()
    ##                                                last_name = check_name[0]; last_name = last_name.strip()
    ##                                                check_name = first_name + ' ' + last_name
    ##                                            else:
    ##                                                check_name = check_name[0]
    ##                                            if re.search(check_name,zeile,re.IGNORECASE):
    ##                                                raise NameError
    ##                                except NameError:
    ##                                    continue
    ##                                # the correct name appeas in the list 
    ##                                else:
    ##                                    #print 'bin hier'
    ##                                    #sys.exit('0')
    ##                                    pass
                                            
        # who's missing
        # handing over the roll call specific name found and representative lists
        if roll_call_number > 0:
            for ii, d in enumerate(start_lines):
                name_found_list = name_found_list_list[ii]
                for i, item in enumerate(name_found_list_list):
                    name_found_list = name_found_list_list[i]
                    representative_list_clean = representative_list_list[i]
                    for j, guy in enumerate(name_found_list):
                        if guy == 0:
                            #print nombres_list[j], 'Found =',name_found_list[j], ' at index position:', j
                            pass

        # putting the lists from the representatives_list_list in a yearly list next to each other
        if len(representative_list_list) > 1:
            for replist in representative_list_list:
                representatives_sheet2.append(replist)
        if len(representative_list_list) == 1:
            representatives_sheet2.append(representative_list_list[0])

        # log output - on screen and as file
        os.chdir(output_dir)
        if file_idx == 0: write_mode = "wb"
        else: write_mode = "a+b"
        logname = 'logfile'+current_year+'.txt'
        logfile = open(logname,write_mode)
        print (100*'#'); print (100*'#'+"\r\n",file=logfile)
        logfile.close()
        logfile = open(logname,"a+b")
        print(filename);print(filename+"\r\n",file=logfile)
        print ('session type:', session_type[0], '/ session number:', int(session_number[0]), '/ pages:', page)
        print ('session type:', session_type[0], '/ session number:', int(session_number[0]), '/ pages:', page,'\r\n',file=logfile)
        print (roll_call_number, 'roll calls in this session')
        print (roll_call_number, 'roll calls in this session\r\n',file=logfile)
        for idx, a in enumerate(start_page):
            print (99*'-'); print (99*'-'+'\r\n',file=logfile)
            print ('Roll call number', idx+1, 'in this session')
            print ('Roll call number', idx+1, 'in this session\r\n',file=logfile)
            print (start_lines[idx], 'Index (add 1 to get the line) where roll call starts. Page:', start_page[idx])
            print (start_lines[idx], 'Index (add 1 to get the line) where roll call starts. Page:', start_page[idx],'\r\n',file=logfile)
            print (end_lines[idx], 'Index (add 1 to get the line) where roll call ends.   Page:', end_page[idx])
            print (end_lines[idx], 'Index (add 1 to get the line) where roll call ends.   Page:', end_page[idx],'\r\n',file=logfile)
            try:
                print ('Printed Results (if published), Yes:', yes_votes[idx], 'No:', no_votes[idx], 'Absentees:', abstentions[idx])
                print ('Printed Results (if published), Yes:', yes_votes[idx], 'No:', no_votes[idx], 'Absentees:', abstentions[idx],'\r\n',file=logfile)
            except: pass
            else: pass
            representative_list_clean = representative_list_list[idx]
            yes = 0; no = 0; absent = 0;
            for element in representative_list_clean:
                if re.search('FAVOR',element,re.IGNORECASE):
                    yes = yes + 1
                if re.search('CONTRA',element,re.IGNORECASE):
                    no = no + 1
                if re.search('AUSENTE',element,re.IGNORECASE):
                    absent = absent + 1
                # all the information for the representatives_sheet in one long list
                representatives_sheet.append(element)
            print ('Votes counted. Yes:', yes,'No:', no,'Absent:', absent)
            print ('Votes counted. Yes:', yes,'No:', no,'Absent:', absent,'\r\n',file=logfile)
            # appending list of total votes for the yearly yes, no, absent variables in the votes.csv
            yes_year.append(yes); no_year.append(no); absent_year.append(absent)
            print ('Alternative list. Yes:', len(yay_list[idx]), 'No:', len(nay_list[idx]))
            print ('Alternative list. Yes:', len(yay_list[idx]), 'No:', len(nay_list[idx]),'\r\n',file=logfile)
            print ('Number of Representatives:', len(representative_list_list[idx]), 'Total Number of Votes:', total_votes[idx])
            print ('Number of Representatives:', len(representative_list_list[idx]), 'Total Number of Votes:', total_votes[idx],'\r\n',file=logfile)
        print ('Total roll call count thus far this year:', rollcalls_per_year)
        print ('Total roll call count thus far this year:', rollcalls_per_year,'\r\n',file=logfile)
        os.chdir(current_path)

        # entire year variables needed for the votes.csv
        for idx, e in enumerate(start_page):
            filenames_year.append(filename)
            session_type_year.extend(session_type)
            session_number_year.extend(session_number)
            datum_year.append(date)

#######################################
# final output 1: representatives.csv #
#######################################
        # last file of the year
        if file_idx == len(file_list)-1:
            os.chdir(output_dir)
            # some final vars
            final_names = []
            # iterating through the long representatives_sheet list
            for element in representatives_sheet:
                content = element.split(';')
                name = content[0]
                final_names.append(name)
            # removing duplicates
            final_names = set(final_names)
            final_names = list(final_names)
            # sorting list A-Z
            final_names = sorted(final_names)

            # creating a header 
            header = ['Names']
            for idx, s in enumerate(representatives_sheet2):
                varname = 'X_'+str(idx+1)
                header.append(varname)
            
            final_sheet = []
            # iterating over the final names
            for row_idx, name in enumerate(final_names):
                final_sheet.append([None]*(len(representatives_sheet2)+1))
                final_sheet[row_idx][0] = name
                # get the representatives_sheet2 containing all votes
                for col_idx, sheet in enumerate(representatives_sheet2):
                    # iterating through the list
                    MP_voted = 0
                    for idx_MP, MP in enumerate(sheet):
                        content = MP.split(';')
                        MP_name = content[0]
                        if name == MP_name:
                            if re.search('Ausente',MP,re.IGNORECASE):
                                MP_voted = 1
                                final_sheet[row_idx][col_idx+1] = 'ABSENT'
                            if re.search('Favor',MP,re.IGNORECASE):
                                MP_voted = 1
                                final_sheet[row_idx][col_idx+1] = 'YES'
                            if re.search('Contra',MP,re.IGNORECASE):
                                MP_voted = 1
                                final_sheet[row_idx][col_idx+1] = 'NO'
                        if idx_MP == len(sheet)-1 and MP_voted == 0:
                            final_sheet[row_idx][col_idx+1] = ''
                            #print (name)

            # adding the header to the beginning of the array
            final_sheet = [header] + final_sheet                
            # matrix of any dimension zipped, i.e. transposed
            representatives_sheet = zip(*final_sheet)
            representatives_sheet = zip(*representatives_sheet)

            # writing representatives sheet to csv file
            sheet1_name = 'representatives'+current_year+'.csv'
            f = open(sheet1_name,"wb")
            fw = csv.writer(f)
            fw.writerows(representatives_sheet)
            f.close()

#############################
# final output 2: votes.csv #
#############################

            # doing the yearly votes identifier list
            for idx, item in enumerate(filenames_year):
                votes_year.append('X_'+str(idx+1))

            # putting together constituents of votes sheet
            votes_sheet = zip(votes_year,filenames_year,datum_year,session_type_year,topic_year,session_number_year,yes_year,no_year,absent_year)

            # writing the votes sheet to csv
            sheet2_name = 'votes'+current_year+'.csv'
            f = open(sheet2_name,"wb")
            fw = csv.writer(f)
            fw.writerows(votes_sheet)
            f.close()
            os.chdir(current_path)
